library(gridExtra)
library(ggplot2)
library(dplyr)
The following plots were created on May 25, 2017 to understand linear transformations of the independent variable to to better fit linear relationships to non-linear data.
set.seed(123)
lin_data <- data.frame(x = runif(1000, 0, 100))
lin_data$y <- rnorm(1000, 10, 10) + 1.2 * lin_data$x
mid <- ggplot(lin_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "y = x")
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- left_data$x # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
line_fit <- fit <- lm(y~x, left_data)
line <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = x")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- 1/right_data$x # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
recip_fit <- fit <- lm(y~x, right_data)
recip <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = 1/(x) \n <---1/(x) Transform----")
grid.arrange(line,mid,recip, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- log(left_data$x)
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
log_fit <- fit <- lm(y~x, left_data)
log <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = log(x) \n ---Log(x) Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- exp(right_data$x)
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
exp_fit <- fit <- lm(y~x, right_data)
exp <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = e^x \n <---Exp(x) Transform----")
grid.arrange(log,mid,exp, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- left_data$x^2 # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
square_fit <- fit <- lm(y~x, left_data)
square <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = x^2 \n ---x^2 Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- sqrt(right_data$x) # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
sqrt_fit <- fit <- lm(y~x, right_data)
sqrt <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = sqrt(x) \n <---sqrt(x) Transform----")
grid.arrange(square,mid,sqrt, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- left_data$x^3 # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
cube_fit <- fit <- lm(y~x, left_data)
cube <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = x^3 \n ---x^3 Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- right_data$x^(1/3) # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
cubic_root_fit <- fit <- lm(y~x, right_data)
cubic_root <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = x^(1/3) \n <---x^(1/3) Transform----")
grid.arrange(cube,mid,cubic_root, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- sin(left_data$x) # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
sin_fit <- fit <- lm(y~x, left_data)
sin <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3, color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = sin(x) \n ---sin(x) Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- cos(right_data$x) # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
cos_fit <- fit <- lm(y~x, right_data)
cos <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3, color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = cos(x) \n <---cos(x) Transform----")
grid.arrange(sin,mid,cos, ncol = 3)
one <- arrangeGrob(line,mid,recip, ncol = 3)
two <- arrangeGrob(log,mid,exp, ncol = 3)
three <- arrangeGrob(square,mid,sqrt, ncol = 3)
four <- arrangeGrob(cube,mid,cubic_root, ncol = 3)
five <- arrangeGrob(sin,mid,cos, ncol = 3)
grid.arrange(one, two, three, four, five,ncol = 1)
See what each non-linear relationship looks like on a Residual vs fitted plot. The following code was run for every transformation:
line_resid <- ggplot(fit, aes(x=line_fit$fitted.values, y = line_fit$residuals))+
geom_point() +
geom_smooth(color="red", lwd = 0.5, se = FALSE, span = 0.5)+
theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "y=x",x="Fitted Values",y = "Residuals") +
theme_bw()
one <- arrangeGrob(line_resid,line_resid,recip_resid, ncol = 3)
two <- arrangeGrob(log_resid,line_resid,exp_resid, ncol = 3)
three <- arrangeGrob(square_resid,line_resid,sqrt_resid, ncol = 3)
four <- arrangeGrob(cube_resid,line_resid,cubic_root_resid, ncol = 3)
five <- arrangeGrob(sin_resid,line_resid,cos_resid, ncol = 3)
grid.arrange(one, two, three, four, five,ncol = 1)